#!/usr/bin/env ruby
#
# Copyright (C) 2010-2018  Brazil
# Copyright (C) 2019-2024  Sutou Kouhei <kou@clear-code.com>
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

require "English"

CUSTOM_RULE_PATH = 'nfkc-custom-rules.txt'

class SwitchGenerator
  def initialize(unicode_version, output)
    @unicode_version = unicode_version
    @output = output
  end

  def generate(bc, decompose_map, compose_map)
    STDERR.puts('generating char type code..')
    generate_blockcode_char_type(bc)
    STDERR.puts('generating decompose code..')
    generate_decompose(decompose_map)
    STDERR.puts('generating compose code..')
    generate_compose(compose_map)
  end

  private
  def generate_blockcode_char_type(bc)
    @output.puts(<<-HEADER)

grn_char_type
grn_nfkc#{@unicode_version}_char_type(const unsigned char *str)
{
    HEADER

    @lv = 0
    gen_bc(bc, 0)

    @output.puts(<<-FOOTER)
  return -1;
}
    FOOTER
  end

  def gen_bc(hash, level)
    bl = ' ' * (level * 2)
    h2 = {}
    hash.each{|key,val|
      key = key.dup
      key.force_encoding("ASCII-8BIT")
      head = key.bytes[0]
      rest = key[1..-1]
      if h2[head]
        h2[head][rest] = val
      else
        h2[head] = {rest => val}
      end
    }
    if h2.size < 3
      h2.keys.sort.each{|k|
        if (0x80 < k)
          @output.printf("#{bl}if (str[#{level}] < 0x%02X) { return #{@lv}; }\n", k)
        end
        h = h2[k]
        if h.keys.join =~ /^\x80*$/n
          @lv, = h.values
        else
          @output.printf("#{bl}if (str[#{level}] == 0x%02X) {\n", k)
          gen_bc(h, level + 1)
          @output.puts bl + '}'
        end
      }
      @output.puts bl + "return #{@lv};"
    else
      @output.puts bl + "switch (str[#{level}]) {"
      lk = 0x80
      br = true
      h2.keys.sort.each{|k|
        if (lk < k)
          for j in lk..k-1
            @output.printf("#{bl}case 0x%02X :\n", j)
          end
          br = false
        end
        unless br
          @output.puts bl + "  return #{@lv};"
          @output.puts bl + '  break;'
        end
        h = h2[k]
        @output.printf("#{bl}case 0x%02X :\n", k)
        if h.keys.join =~ /^\x80*$/n
          @lv, = h.values
          br = false
        else
          gen_bc(h, level + 1)
          @output.puts bl + '  break;'
          br = true
        end
        lk = k + 1
      }
      @output.puts bl + 'default :'
      @output.puts bl + "  return #{@lv};"
      @output.puts bl + '  break;'
      @output.puts bl + '}'
    end
  end

  def generate_decompose(hash)
    @output.puts(<<-HEADER)

const char *
grn_nfkc#{@unicode_version}_decompose(const unsigned char *str)
{
    HEADER

    gen_decompose(hash, 0)

    @output.puts(<<-FOOTER)
  return 0;
}
    FOOTER
  end

  def gen_decompose(hash, level)
    bl = ' ' * ((level + 0) * 2)
    if hash['']
      dst = ''
      hash[''].each_byte{|b| dst << format('\x%02X', b)}
      @output.puts "#{bl}return \"#{dst}\";"
      hash.delete('')
    end
    return if hash.empty?
    h2 = {}
    hash.each{|key,val|
      key = key.dup
      key.force_encoding("ASCII-8BIT")
      head = key.bytes[0]
      rest = key[1..-1]
      if h2[head]
        h2[head][rest] = val
      else
        h2[head] = {rest => val}
      end
    }
    if h2.size == 1
      h2.each{|key,val|
        @output.printf("#{bl}if (str[#{level}] == 0x%02X) {\n", key)
        gen_decompose(val, level + 1)
        @output.puts bl + '}'
      }
    else
      @output.puts "#{bl}switch (str[#{level}]) {"
      h2.keys.sort.each{|k|
        @output.printf("#{bl}case 0x%02X :\n", k)
        gen_decompose(h2[k], level + 1)
        @output.puts("#{bl}  break;")
      }
      @output.puts bl + '}'
    end
  end

  def generate_compose(compose_map)
    @output.puts(<<-HEADER)

const char *
grn_nfkc#{@unicode_version}_compose(const unsigned char *prefix, const unsigned char *suffix)
{
    HEADER
    suffix = {}
    compose_map.each{|src,dst|
      chars = src.chars
      if chars.size != 2
        STDERR.puts "caution: more than two chars in pattern #{chars.join('|')}"
      end
      s = chars.pop
      if suffix[s]
        suffix[s][chars.join] = dst
      else
        suffix[s] = {chars.join=>dst}
      end
    }
    gen_compose_sub(suffix, 0)
    @output.puts(<<-FOOTER)
  return 0;
}
    FOOTER
  end

  def gen_compose_sub2(hash, level, indent)
    bl = ' ' * ((level + indent + 0) * 2)
    if hash['']
      @output.print "#{bl}return \""
      hash[''].each_byte{|b| @output.printf('\x%02X', b)}
      @output.puts "\";"
      hash.delete('')
    end
    return if hash.empty?

    h2 = {}
    hash.each{|key,val|
      key = key.dup
      key.force_encoding("ASCII-8BIT")
      head = key.bytes[0]
      rest = key[1..-1]
      if h2[head]
        h2[head][rest] = val
      else
        h2[head] = {rest => val}
      end
    }

    if h2.size == 1
      h2.each{|key,val|
        @output.printf("#{bl}if (prefix[#{level}] == 0x%02X) {\n", key)
        gen_compose_sub2(val, level + 1, indent)
        @output.puts bl + '}'
      }
    else
      @output.puts "#{bl}switch (prefix[#{level}]) {"
      h2.keys.sort.each{|k|
        @output.printf("#{bl}case 0x%02X :\n", k)
        gen_compose_sub2(h2[k], level + 1, indent)
        @output.puts("#{bl}  break;")
      }
      @output.puts bl + '}'
    end
  end

  def gen_compose_sub(hash, level)
    bl = ' ' * ((level + 0) * 2)
    if hash['']
      gen_compose_sub2(hash[''], 0, level)
      hash.delete('')
    end
    return if hash.empty?
    h2 = {}
    hash.each{|key,val|
      key = key.dup
      key.force_encoding("ASCII-8BIT")
      head = key.bytes[0]
      rest = key[1..-1]
      if h2[head]
        h2[head][rest] = val
      else
        h2[head] = {rest => val}
      end
    }
    if h2.size == 1
      h2.each{|key,val|
        @output.printf("#{bl}if (suffix[#{level}] == 0x%02X) {\n", key)
        gen_compose_sub(val, level + 1)
        @output.puts bl + '}'
      }
    else
      @output.puts "#{bl}switch (suffix[#{level}]) {"
      h2.keys.sort.each{|k|
        @output.printf("#{bl}case 0x%02X :\n", k)
        gen_compose_sub(h2[k], level + 1)
        @output.puts("#{bl}  break;")
      }
      @output.puts bl + '}'
    end
  end
end

class TableGenerator < SwitchGenerator
  private
  def name_prefix
    "grn_nfkc#{@unicode_version}_"
  end

  def table_name(type, common_bytes)
    suffix = common_bytes.collect {|byte| "%02x" % byte}.join("")
    "#{name_prefix}#{type}_table_#{suffix}"
  end

  def function_name(type)
    "#{name_prefix}#{type}"
  end

  def generate_char_convert_tables(type, return_type, byte_size_groups)
    if return_type.end_with?("*")
      space = ""
    else
      space = " "
    end
    byte_size_groups.keys.sort.each do |common_bytes|
      chars = byte_size_groups[common_bytes]
      lines = []
      all_values = []
      last_bytes = chars.collect {|char| char.bytes.last}
      last_bytes.min.step(last_bytes.max).each_slice(8) do |slice|
        values = slice.collect do |last_byte|
          char = (common_bytes + [last_byte]).pack("c*")
          char.force_encoding("UTF-8")
          yield(char)
        end
        all_values.concat(values)
        lines << ("  " + values.join(", "))
      end

      next if all_values.uniq.size == 1

      @output.puts(<<-TABLE_HEADER)

static #{return_type}#{space}#{table_name(type, common_bytes)}[] = {
      TABLE_HEADER
      @output.puts(lines.join(",\n"))
      @output.puts(<<-TABLE_FOOTER)
};
      TABLE_FOOTER
    end
  end

  def generate_char_convert_function(type,
                                     argument_list,
                                     char_variable,
                                     default,
                                     return_type,
                                     byte_size_groups,
                                     options={})
    modifier = options[:internal] ? "static inline " : ""
    @output.puts(<<-HEADER)

#{modifier}#{return_type}
#{function_name(type)}(#{argument_list})
{
    HEADER

    prev_common_bytes = []
    switch_depth = 0
    first_group = true
    byte_size_groups.keys.sort.each do |common_bytes|
      chars = byte_size_groups[common_bytes]
      chars_bytes = chars.collect(&:bytes).sort
      min = chars_bytes.first.last
      max = chars_bytes.last.last
      if common_bytes.empty?
        indent = "  "
        yield(:no_common_bytes, indent, chars, chars_bytes)
      else
        if first_group
          @output.puts(<<-BODY)
  {
          BODY
        end

        found_different_byte = false
        common_bytes.each_with_index do |common_byte, i|
          unless found_different_byte
            if prev_common_bytes[i] == common_byte
              next
            end
            found_different_byte = true
          end
          indent = "  " * i
          # p [type, i, prev_common_bytes.collect{|x| "%#04x" % x}, common_bytes.collect{|x| "%#04x" % x}, "%#04x" % common_byte, switch_depth]
          # TODO: The following code may be able to be simplified.
          if prev_common_bytes[i].nil?
            # p nil
            switch_depth += 1
            @output.puts(<<-BODY)
    #{indent}switch (#{char_variable}[#{i}]) {
            BODY
          elsif i < switch_depth - 1
            # p :prev
            switch_depth -= 1
            @output.puts(<<-BODY)
    #{indent}  default :
    #{indent}    break;
    #{indent}  }
    #{indent}  break;
            BODY
          elsif i == switch_depth
            # p :common_prev
            switch_depth += 1
            @output.puts(<<-BODY)
    #{indent}switch (#{char_variable}[#{i}]) {
            BODY
          else
            # p [:else, prev_common_bytes.size, common_bytes.size + 1, switch_depth]
            prev_common_bytes.size.downto(common_bytes.size + 1) do |j|
              sub_indent = "  " * (j - 1)
              switch_depth -= 1
              @output.puts(<<-BODY)
    #{indent}#{sub_indent}default :
    #{indent}#{sub_indent}  break;
    #{indent}#{sub_indent}}
    #{indent}#{sub_indent}break;
              BODY
            end
          end
          @output.puts(<<-BODY)
    #{indent}case #{"%#04x" % common_byte} :
          BODY
        end

        n = chars_bytes.first.size - 1
        indent = "    " + ("  " * common_bytes.size)
        yield(:have_common_bytes, indent, chars, chars_bytes, n, common_bytes)
      end

      prev_common_bytes = common_bytes
      first_group = false
    end

    # p [prev_common_bytes.collect{|x| "%#04x" % x}]

    (prev_common_bytes.size - 1).step(0, -1) do |i|
      indent = "  " * i
      @output.puts(<<-BODY)
    #{indent}default :
    #{indent}  break;
    #{indent}}
      BODY
      if i > 0
        @output.puts(<<-BODY)
    #{indent}break;
        BODY
      end
    end

    @output.puts(<<-FOOTER)
  }

  return #{default};
}
    FOOTER
  end

  def generate_char_converter(type,
                              function_type,
                              char_map,
                              default,
                              return_type,
                              options={},
                              &converter)
    byte_size_groups = char_map.keys.group_by do |from|
      bytes = from.bytes
      bytes[0..-2]
    end

    generate_char_convert_tables(type,
                                 return_type,
                                 byte_size_groups,
                                 &converter)

    char_variable = "utf8"
    generate_char_convert_function(function_type,
                                   "const unsigned char *#{char_variable}",
                                   char_variable,
                                   default,
                                   return_type,
                                   byte_size_groups,
                                   options) do |state, *args|
      case state
      when :no_common_bytes
        indent, chars, chars_bytes = args
        if chars.size == 1
          char = chars[0]
          char_byte = chars_bytes.first.first
          value = yield(char)
          @output.puts(<<-BODY)
#{indent}if (#{char_variable}[0] < 0x80) {
#{indent}  if (#{char_variable}[0] == #{"%#04x" % char_byte}) {
#{indent}    return #{value};
#{indent}  } else {
#{indent}    return #{default};
#{indent}  }
#{indent}} else {
          BODY
        else
          min = chars_bytes.first.first
          max = chars_bytes.last.first
          @output.puts(<<-BODY)
#{indent}if (#{char_variable}[0] < 0x80) {
#{indent}  if (#{char_variable}[0] >= #{"%#04x" % min} &&
#{indent}      #{char_variable}[0] <= #{"%#04x" % max}) {
#{indent}    return #{table_name(type, [])}[#{char_variable}[0] - #{"%#04x" % min}];
#{indent}  } else {
#{indent}    return #{default};
#{indent}  }
#{indent}} else {
          BODY
        end
      when :have_common_bytes
        indent, chars, chars_bytes, n, common_bytes = args
        if chars.size == 1
          char = chars[0]
          char_byte = chars_bytes.first.last
          value = yield(char)
          @output.puts(<<-BODY)
#{indent}if (#{char_variable}[#{n}] == #{"%#04x" % char_byte}) {
#{indent}  return #{value};
#{indent}}
#{indent}break;
          BODY
        else
          sorted_chars = chars.sort
          min = chars_bytes.first.last
          max = chars_bytes.last.last
          all_values = (min..max).collect do |last_byte|
            char = (common_bytes + [last_byte]).pack("c*")
            char.force_encoding("UTF-8")
            yield(char)
          end
          if all_values.uniq.size == 1
            value = all_values.first
          else
            value = "#{table_name(type, common_bytes)}[#{char_variable}[#{n}] - #{"%#04x" % min}]"
          end
          last_n_bits_for_char_in_utf8 = 6
          max_n_chars_in_byte = 2 ** last_n_bits_for_char_in_utf8
          if all_values.size == max_n_chars_in_byte
            @output.puts(<<-BODY)
#{indent}return #{value};
            BODY
          else
            @output.puts(<<-BODY)
#{indent}if (#{char_variable}[#{n}] >= #{"%#04x" % min} &&
#{indent}    #{char_variable}[#{n}] <= #{"%#04x" % max}) {
#{indent}  return #{value};
#{indent}}
#{indent}break;
            BODY
          end
        end
      end
    end
  end

  def generate_blockcode_char_type(block_codes)
    default = "GRN_CHAR_OTHERS"

    char_types = {}
    current_type = default
    prev_char = nil
    block_codes.keys.sort.each do |char|
      type = block_codes[char]
      if current_type != default
        prev_code_point = prev_char.codepoints[0]
        code_point = char.codepoints[0]
        (prev_code_point...code_point).each do |target_code_point|
          target_char = [target_code_point].pack("U*")
          char_types[target_char] = current_type
        end
      end
      current_type = type
      prev_char = char
    end
    unless current_type == default
      raise "TODO: Consider the max unicode character"
      max_unicode_char = "\u{10ffff}"
      (prev_char..max_unicode_char).each do |target_char|
        char_types[target_char] = current_type
      end
    end

    generate_char_converter("char_type",
                            "char_type",
                            char_types,
                            default,
                            "grn_char_type") do |char|
      char_types[char] || default
    end
  end

  def generate_decompose(decompose_map)
    default = "NULL"
    generate_char_converter("decompose",
                            "decompose",
                            decompose_map,
                            default,
                            "const char *") do |from|
      to = decompose_map[from]
      if to
        escaped_value = to.bytes.collect {|char| "\\x%02x" % char}.join("")
        "\"#{escaped_value}\""
      else
        default
      end
    end
  end

  def generate_compose(compose_map)
    # require "pp"
    # p compose_map.size
    # pp compose_map.keys.group_by {|x| x.chars[1]}.size
    # pp compose_map.keys.group_by {|x| x.chars[1]}.collect {|k, vs| [k, k.codepoints, vs.size, vs.group_by {|x| x.chars[0].bytesize}.collect {|k2, vs2| [k2, vs2.size]}]}
    # pp compose_map.keys.group_by {|x| x.chars[0].bytesize}.collect {|k, vs| [k, vs.size]}
    # pp compose_map

    suffix_char_map = {}
    compose_map.each do |source, destination|
      chars = source.chars
      if chars.size != 2
        STDERR.puts "caution: more than two chars in pattern #{chars.join('|')}"
        return
      end
      prefix, suffix = chars
      suffix_char_map[suffix] ||= {}
      suffix_char_map[suffix][prefix] = destination
    end

    suffix_char_map.each do |suffix, prefix_char_map|
      suffix_bytes = suffix.bytes.collect {|byte| "%02x" % byte}.join("")
      default = "NULL"
      generate_char_converter("compose_prefix_#{suffix_bytes}",
                              "compose_prefix_#{suffix_bytes}",
                              prefix_char_map,
                              default,
                              "const char *",
                              :internal => true) do |prefix|
        to = prefix_char_map[prefix]
        if to
          escaped_value = to.bytes.collect {|char| "\\x%02x" % char}.join("")
          "\"#{escaped_value}\""
        else
          default
        end
      end
    end


    char_variable = "suffix_utf8"
    argument_list =
      "const unsigned char *prefix_utf8, " +
      "const unsigned char *#{char_variable}"
    default = "NULL"
    byte_size_groups = suffix_char_map.keys.group_by do |from|
      bytes = from.bytes
      bytes[0..-2]
    end
    generate_char_convert_function("compose",
                                   argument_list,
                                   char_variable,
                                   default,
                                   "const char *",
                                   byte_size_groups) do |type, *args|
      case type
      when :no_common_bytes
        indent, chars, chars_bytes = args
        @output.puts(<<-BODY)
#{indent}switch (#{char_variable}[0]) {
        BODY
        chars.each do |char|
          suffix_bytes = char.bytes.collect {|byte| "%02x" % byte}.join("")
          type = "compose_prefix_#{suffix_bytes}"
          @output.puts(<<-BODY)
#{indent}case #{"%#04x" % char.bytes.last} :
#{indent}  return #{function_name(type)}(prefix_utf8);
          BODY
        end
        @output.puts(<<-BODY)
#{indent}default :
#{indent}  return #{default};
#{indent}}
#{indent}break;
        BODY
      when :have_common_bytes
        indent, chars, chars_bytes, n, common_bytes = args
        @output.puts(<<-BODY)
#{indent}switch (#{char_variable}[#{n}]) {
        BODY
        chars.each do |char|
          suffix_bytes = char.bytes.collect {|byte| "%02x" % byte}.join("")
          type = "compose_prefix_#{suffix_bytes}"
          @output.puts(<<-BODY)
#{indent}case #{"%#04x" % char.bytes.last} :
#{indent}  return #{function_name(type)}(prefix_utf8);
          BODY
        end
        @output.puts(<<-BODY)
#{indent}default :
#{indent}  return #{default};
#{indent}}
#{indent}break;
        BODY
      end
    end
  end

  def to_bytes_map(char_map)
    bytes_map = {}
    char_map.each_key do |from|
      parent = bytes_map
      from.bytes[0..-2].each do |byte|
        parent[byte] ||= {}
        parent = parent[byte]
      end
      parent[from.bytes.last] = char_map[from]
    end
    bytes_map
  end
end

def create_bc(option)
  bc = {}
  open("|./icudump --#{option}").each{|l|
    src,_,code = l.chomp.split("\t")
    str = src.split(':').collect(&:hex).pack("c*")
    str.force_encoding("UTF-8")
    bc[str] = code
  }
  bc
end

def ccpush(hash, src, dst)
  head = src.shift
  hash[head] = {} unless hash[head]
  if head
    ccpush(hash[head], src, dst)
  else
    hash[head] = dst
  end
end

def subst(hash, str)
  cand = nil
  src = str.chars
  for i in 0..src.size-1
    h = hash
    for j in i..src.size-1
      head = src[j]
      h = h[head]
      break unless h
      if h[nil]
        cand = src[0,i].join("") + h[nil] + src[j + 1..-1].join("")
      end
    end
    return cand if cand
  end
  return str
end

def downcase(character)
  downcased_character = character.downcase
  if character == "\u0130" and downcased_character == "\u0069\u0307"
    # The lowercase of "U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE"
    # must be "U+0069 LATIN SMALL LETTER I" but Ruby uses "U+0069
    # LATIN SMALL LETTER I" + "U+0307 COMBINING DOT ABOVE".
    downcased_character = "\u0069"
  elsif character == "\u0049\u0307" and downcased_character == "\u0069\u0307"
    # The lowercase of "U+0049 LATIN CAPITAL LETTER I" + "U+0307
    # COMBINING DOT ABOVE" must be "U+0069 LATIN
    # SMALL LETTER I".
    downcased_character = "\u0069"
  end
  downcased_character
end

def map_entry(decompose, cc, src, dst)
  dst = downcase(dst) unless $case_sensitive
  loop {
    dst2 = subst(cc, dst)
    break if dst2 == dst
    dst = dst2
  }
  unless $keep_space
    dst = $1 if dst =~ /^ +([^ ].*)$/
  end
  decompose[src] = dst if src != dst
end

def create_decompose_map()
  cc = {}
  open('|./icudump --cc').each{|l|
    _,src,dst = l.chomp.split("\t")
    if cc[src]
      STDERR.puts "caution: ambiguous mapping #{src}|#{cc[src]}|#{dst}" if cc[src] != dst
    end
    ccpush(cc, src.chars, dst)
  }
  decompose_map = {}
  open('|./icudump --nfkd').each{|l|
    n,src,dst = l.chomp.split("\t")
    map_entry(decompose_map, cc, src, dst)
  }
  if File.readable?(CUSTOM_RULE_PATH)
    open(CUSTOM_RULE_PATH).each{|l|
      src,dst = l.chomp.split("\t")
      map_entry(decompose_map, cc, src, dst)
    }
  end
  unless $case_sensitive
    (0x1..0x110000).each do |code_point|
      char = [code_point].pack("U")
      next unless char.valid_encoding?
      downcased_char = downcase(char)
      next if char == downcased_char
      decompose_map[char] ||= downcased_char
    end
  end
  return decompose_map
end

def create_compose_map(decompose_map)
  cc = {}
  open('|./icudump --cc').each{|l|
    _,src,dst = l.chomp.split("\t")
    src = src.chars.collect{|c| decompose_map[c] || c}.join
    dst = decompose_map[dst] || dst
    if cc[src] && cc[src] != dst
      STDERR.puts("caution: inconsistent mapping '#{src}' => '#{cc[src]}'|'#{dst}'")
    end
    cc[src] = dst if src != dst
  }
  loop {
    noccur = 0
    cc2 = {}
    cc.each {|src,dst|
      src2 = src
      chars = src.chars
      l = chars.size - 1
      for i in 0..l
        for j in i..l
          next if i == 0 && j == l
          str = chars[i..j].join
          if decompose_map[str]
            STDERR.printf("caution: recursive mapping '%s'=>'%s'\n",
                          str, decompose_map[str])
          end
          if cc[str]
            src2 = (i > 0 ? chars[0..i-1].join : '') + cc[str] + (j < l ? chars[j+1..l].join : '')
            noccur += 1
          end
        end
      end
      cc2[src2] = dst if src2 != dst
    }
    cc = cc2
    STDERR.puts("substituted #{noccur} patterns.")
    break if noccur == 0
    STDERR.puts('try again..')
  }
  return cc
end

def license_header
  File.read(__FILE__).
    # Extract the header comment
    split(/^[^#]/, 2)[0].
    # Remove Ruby style comment marks
    gsub(/^# ?/, "").
    # Remove shebang
    gsub(/\A.*$/, "").
    strip.
    # Indent
    gsub(/^(\w)/, "  \\1")
end

######## main #######

generator_class = TableGenerator
source_directory = "."
ARGV.each do |arg|
  case arg
  when /\A--source-directory=/
    source_directory = $POSTMATCH
  when /-*c/i
    $case_sensitive = true
  when /-*s/i
    $keep_space = true
  when "--impl=switch"
    generator_class = SwitchGenerator
  when "--impl=table"
    generator_class = TableGenerator
  end
end

icu_home = ENV["ICU_HOME"] || "/tmp/local"
STDERR.puts("compiling icudump on #{icu_home}")
system("cc",
       "-Wall",
       "-O3",
       "-g3",
       "-o", "icudump",
       "-I#{icu_home}/include",
       "-L#{icu_home}/lib",
       File.join(source_directory, "icudump.c"),
       "-licuuc",
       "-licui18n",
       "-licudata") or exit(false)

ENV["LD_LIBRARY_PATH"] = "#{icu_home}/lib:#{ENV["LD_LIBRARY_PATH"]}"

STDERR.puts('getting Unicode version')
unicode_version = `./icudump --version`.strip.gsub(".", "")

STDERR.puts('creating bc..')
bc = create_bc("gc")

STDERR.puts('creating decompose map..')
decompose_map = create_decompose_map()

STDERR.puts('creating compose map..')
compose_map = create_compose_map(decompose_map)

output_path = File.join(source_directory, "nfkc#{unicode_version}.c")
File.open(output_path, "w") do |output|
  output.puts(<<-HEADER)
/* -*- c-basic-offset: 2 -*- */
/*
#{license_header}
*/

/*
  Don't edit this file by hand. it generated automatically by nfkc.rb.
*/

#include "grn.h"
#include "grn_nfkc.h"
#include <groonga/nfkc.h>

#ifdef GRN_WITH_NFKC
  HEADER

  generator = generator_class.new(unicode_version, output)
  generator.generate(bc, decompose_map, compose_map)

  output.puts(<<-FOOTER)

#endif /* GRN_WITH_NFKC */

  FOOTER
end
